### GED Data prediction

#### Configuration #####
library(dplyr)
library(randomForestSRC)
library(xgboost)
library(MLmetrics)

options(scipen=999)
setwd("F:/Views Competition/Update September 2020")
setwd("C:/Users/Felix Ettensperger/Desktop/Views Competition/Backup")
memory.size(32000)


###### Load Data #####
views.ged <- read.csv("views_ged.csv", header=TRUE)

# filter for africa only
views.ged <- filter(views.ged, in_africa==1)

# Backup
views.ged -> backup.ged


## Change to scale=true after running RF versions to rerun in XGBoost ####

# Scale
views.ged -> views.xgb
scale(views.xgb[,7:48]) -> views.xgb[,7:48]
views.xgb -> views.ged

# Reset to unscale
backup.ged -> views.ged


## LEARN - PREDICT SETUP for 3 batches of forecasts: #####


### Task 2: January 2017 - Dec 2019 Forecasts (m_id 445-480) ####



# s-1: Setting test set (for s-1 444-479 is prediction set)
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 443)
views.ged.pred.t2  <- filter(views.ged, month_id >= 444 & month_id <= 479)

# s-2: Setting test set (for s-2 443-478 is prediction set)
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 442)
views.ged.pred.t2  <- filter(views.ged, month_id >= 443 & month_id <= 478)

# s-3: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 441)
views.ged.pred.t2  <- filter(views.ged, month_id >= 442 & month_id <= 477)

# s-4: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 440)
views.ged.pred.t2  <- filter(views.ged, month_id >= 441 & month_id <= 476)

# s-5: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 439)
views.ged.pred.t2  <- filter(views.ged, month_id >= 440 & month_id <= 475)

# s-6: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 438)
views.ged.pred.t2  <- filter(views.ged, month_id >= 439 & month_id <= 474)

# s-7: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 437)
views.ged.pred.t2  <- filter(views.ged, month_id >= 438 & month_id <= 473)


#+#


### Task 3: January 2014 - Dec 2016 Forecasts (m_id 409-444) ####

# s-1: Setting test set (for s-1 408-443 is prediction set)
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 407)
views.ged.pred.t3  <- filter(views.ged, month_id >= 408 & month_id <= 443)

# s-2: Setting test set (for s-2 407-442 is prediction set)
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 406)
views.ged.pred.t3  <- filter(views.ged, month_id >= 407 & month_id <= 442)

# s-3: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 405)
views.ged.pred.t3  <- filter(views.ged, month_id >= 406 & month_id <= 441)

# s-4: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 404)
views.ged.pred.t3  <- filter(views.ged, month_id >= 405 & month_id <= 440)

# s-5: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 403)
views.ged.pred.t3  <- filter(views.ged, month_id >= 404 & month_id <= 439)

# s-6: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 402)
views.ged.pred.t3  <- filter(views.ged, month_id >= 403 & month_id <= 438)

# s-7: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 401)
views.ged.pred.t3  <- filter(views.ged, month_id >= 402 & month_id <= 437)


#+#

### Task 1: True forecasts Oct 2020 - March 2021 (m_id 490-495) ####
### Learn for prediction of October 2020 s2 (learn only till 486 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 486)
ged.pred.true  <- filter(views.ged, month_id == 488)

### Learn for prediction of November 2020 s3 (learn only till 485 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 485)
ged.pred.true  <- filter(views.ged, month_id >= 487 & month_id <= 488)

### Learn for prediction of Dezember 2020 s4 (learn only till 484 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 484)
ged.pred.true  <- filter(views.ged, month_id >= 486 & month_id <= 488)

### Learn for prediction of January 2021 s5 (learn only till 483 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 483)
ged.pred.true  <- filter(views.ged, month_id >= 485 & month_id <= 488)

### Learn for prediction of February 2021 s6 (learn only till 482 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 482)
ged.pred.true  <- filter(views.ged, month_id >= 484 & month_id <= 488)

### Learn for prediction of March 2021 s7 (learn only till 481 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 481)
ged.pred.true  <- filter(views.ged, month_id >= 483 & month_id <= 488)


#+#+#+#+#+#+#
# RF Predictions #####
#+#+#+#+#+#+#

##### RF.01 prediction test ######
rf.ged.01 <- rfsrc(ln_ged_best_sb_s2 ~., views.ged.learn[,c(7:48, 57)], ntree = 75,
                        na.action="na.impute")

rf.ged.01

prediction.ged.01 <- predict(rf.ged.01, views.ged.pred, na.action = "na.impute")

prediction.ged.01
#+#+# #####

#### RF Task 2 - s1-6 Forecasts #####
rf.t2.ged.s1 <- rfsrc(ln_ged_best_sb_s1 ~., views.ged.learn.t2[,c(7:48, 56)], ntree = 350,
                      na.action="na.impute")

rf.t2.ged.s2 <- rfsrc(ln_ged_best_sb_s2 ~., views.ged.learn.t2[,c(7:48, 57)], ntree = 350,
                   na.action="na.impute")

rf.t2.ged.s3 <- rfsrc(ln_ged_best_sb_s3 ~., views.ged.learn.t2[,c(7:48, 58)], ntree = 350,
                      na.action="na.impute")

rf.t2.ged.s4 <- rfsrc(ln_ged_best_sb_s4 ~., views.ged.learn.t2[,c(7:48, 59)], ntree = 350,
                      na.action="na.impute")

rf.t2.ged.s5 <- rfsrc(ln_ged_best_sb_s5 ~., views.ged.learn.t2[,c(7:48, 60)], ntree = 350,
                      na.action="na.impute")

rf.t2.ged.s6 <- rfsrc(ln_ged_best_sb_s6 ~., views.ged.learn.t2[,c(7:48, 61)], ntree = 350,
                      na.action="na.impute")

rf.t2.ged.s7 <- rfsrc(ln_ged_best_sb_s7 ~., views.ged.learn.t2[,c(7:48, 62)], ntree = 350,
                      na.action="na.impute")

rf.t2.ged.s1
rf.t2.ged.s2
rf.t2.ged.s3
rf.t2.ged.s4
rf.t2.ged.s5
rf.t2.ged.s6
rf.t2.ged.s7

rm(rf.t2.ged.s1)
rm(rf.t2.ged.s2)
rm(rf.t2.ged.s3)
rm(rf.t2.ged.s4)
rm(rf.t2.ged.s5)
rm(rf.t2.ged.s6)

## Predict Task 2
pr.t2.ged.s1 <- predict(rf.t2.ged.s1, views.ged.pred.t2, na.action = "na.impute")
pr.t2.ged.s2 <- predict(rf.t2.ged.s2, views.ged.pred.t2, na.action = "na.impute")
pr.t2.ged.s3 <- predict(rf.t2.ged.s3, views.ged.pred.t2, na.action = "na.impute")
pr.t2.ged.s4 <- predict(rf.t2.ged.s4, views.ged.pred.t2, na.action = "na.impute")
pr.t2.ged.s5 <- predict(rf.t2.ged.s5, views.ged.pred.t2, na.action = "na.impute")
pr.t2.ged.s6 <- predict(rf.t2.ged.s6, views.ged.pred.t2, na.action = "na.impute")
pr.t2.ged.s7 <- predict(rf.t2.ged.s7, views.ged.pred.t2, na.action = "na.impute")


pr.t2.ged.s1
pr.t2.ged.s2
pr.t2.ged.s3
pr.t2.ged.s4
pr.t2.ged.s5
pr.t2.ged.s6
pr.t2.ged.s7

rm(pr.t2.ged.s1)
rm(pr.t2.ged.s2)
rm(pr.t2.ged.s3)
rm(pr.t2.ged.s4)
rm(pr.t2.ged.s5)
rm(pr.t2.ged.s6)

#### RF Task 3 - s1-6 Forecasts #####
#### RF Task 3 - s1-6 Forecasts #####
rf.t3.ged.s1 <- rfsrc(ln_ged_best_sb_s1 ~., views.ged.learn.t3[,c(7:48, 56)], ntree = 350,
                      na.action="na.impute")

rf.t3.ged.s2 <- rfsrc(ln_ged_best_sb_s2 ~., views.ged.learn.t3[,c(7:48, 57)], ntree = 350,
                      na.action="na.impute")

rf.t3.ged.s3 <- rfsrc(ln_ged_best_sb_s3 ~., views.ged.learn.t3[,c(7:48, 58)], ntree = 350,
                      na.action="na.impute")

rf.t3.ged.s4 <- rfsrc(ln_ged_best_sb_s4 ~., views.ged.learn.t3[,c(7:48, 59)], ntree = 350,
                      na.action="na.impute")

rf.t3.ged.s5 <- rfsrc(ln_ged_best_sb_s5 ~., views.ged.learn.t3[,c(7:48, 60)], ntree = 350,
                      na.action="na.impute")

rf.t3.ged.s6 <- rfsrc(ln_ged_best_sb_s6 ~., views.ged.learn.t3[,c(7:48, 61)], ntree = 350,
                      na.action="na.impute")

rf.t3.ged.s7 <- rfsrc(ln_ged_best_sb_s7 ~., views.ged.learn.t3[,c(7:48, 62)], ntree = 350,
                      na.action="na.impute")

rm(rf.t3.ged.s1)
rm(rf.t3.ged.s2)
rm(rf.t3.ged.s3)
rm(rf.t3.ged.s4)
rm(rf.t3.ged.s5)
rm(rf.t3.ged.s6)


rf.t3.ged.s1
rf.t3.ged.s2
rf.t3.ged.s3
rf.t3.ged.s4
rf.t3.ged.s5
rf.t3.ged.s6
rf.t3.ged.s7

## Predict Task 3
pr.t3.ged.s1 <- predict(rf.t3.ged.s1, views.ged.pred.t3, na.action = "na.impute")
pr.t3.ged.s2 <- predict(rf.t3.ged.s2, views.ged.pred.t3, na.action = "na.impute")
pr.t3.ged.s3 <- predict(rf.t3.ged.s3, views.ged.pred.t3, na.action = "na.impute")
pr.t3.ged.s4 <- predict(rf.t3.ged.s4, views.ged.pred.t3, na.action = "na.impute")
pr.t3.ged.s5 <- predict(rf.t3.ged.s5, views.ged.pred.t3, na.action = "na.impute")
pr.t3.ged.s6 <- predict(rf.t3.ged.s6, views.ged.pred.t3, na.action = "na.impute")
pr.t3.ged.s7 <- predict(rf.t3.ged.s7, views.ged.pred.t3, na.action = "na.impute")

rm(pr.t3.ged.s1)
rm(pr.t3.ged.s2)
rm(pr.t3.ged.s3)
rm(pr.t3.ged.s4)
rm(pr.t3.ged.s5)
rm(pr.t3.ged.s6)

pr.t3.ged.s1
pr.t3.ged.s2
pr.t3.ged.s3
pr.t3.ged.s4
pr.t3.ged.s5
pr.t3.ged.s6
pr.t3.ged.s7





### RF Task 1 - True 2020-21 predictions #####

### True S2 - October 2020
rf.t1.ged.s2 <- rfsrc(ln_ged_best_sb_s2 ~., ged.learn.true[,c(7:48, 57)], ntree = 350,
                           na.action="na.impute")

### True S3 - Oct & Nov 2020
rf.t1.ged.s3 <- rfsrc(ln_ged_best_sb_s3 ~., ged.learn.true[,c(7:48, 58)], ntree = 350,
                      na.action="na.impute")

### True S4 - Oct & Nov & Dec 2020
rf.t1.ged.s4 <- rfsrc(ln_ged_best_sb_s4 ~., ged.learn.true[,c(7:48, 59)], ntree = 350,
                      na.action="na.impute")

### True S5
rf.t1.ged.s5 <- rfsrc(ln_ged_best_sb_s5 ~., ged.learn.true[,c(7:48, 60)], ntree = 350,
                      na.action="na.impute")

### True S6
rf.t1.ged.s6 <- rfsrc(ln_ged_best_sb_s6 ~., ged.learn.true[,c(7:48, 61)], ntree = 350,
                      na.action="na.impute")

### True S7
rf.t1.ged.s7 <- rfsrc(ln_ged_best_sb_s7 ~., ged.learn.true[,c(7:48, 62)], ntree = 350,
                      na.action="na.impute")

rf.t1.ged.s2
rf.t1.ged.s3
rf.t1.ged.s4
rf.t1.ged.s5
rf.t1.ged.s6
rf.t1.ged.s7

pr.t1.ged.s2 <- predict(rf.t1.ged.s2, ged.pred.true, na.action = "na.impute")
pr.t1.ged.s3 <- predict(rf.t1.ged.s3, ged.pred.true, na.action = "na.impute")
pr.t1.ged.s4 <- predict(rf.t1.ged.s4, ged.pred.true, na.action = "na.impute")
pr.t1.ged.s5 <- predict(rf.t1.ged.s5, ged.pred.true, na.action = "na.impute")
pr.t1.ged.s6 <- predict(rf.t1.ged.s6, ged.pred.true, na.action = "na.impute")
pr.t1.ged.s7 <- predict(rf.t1.ged.s7, ged.pred.true, na.action = "na.impute")

##### XG Boost prediction #####
## Scaling for XGBoost ####
rm(views.learn.xgb)

views.ged -> views.xgb
scale(views.xgb[,7:48]) -> views.xgb[,7:48]

views.learn.xgb <- filter(views.xgb, month_id >= 114 & month_id <= 442)
views.pred.xgb  <- filter(views.xgb, month_id >= 443 & month_id <= 478)
#####


### s2 - month 490
views.learn.xgb <- filter(views.xgb, month_id >= 114 & month_id <= 486)
views.pred.xgb  <- filter(views.xgb, month_id == 488)

na.omit(views.learn.xgb) -> views.learn.xgb
na.omit(views.pred.xgb) -> views.pred.xgb
#####

#### NA omit for Task 1 ####
na.omit(ged.learn.true) -> views.learn.xgb
ged.pred.true -> views.pred.xgb

#### NA omit for Task 2 ####
na.omit(views.ged.learn.t2) -> views.learn.xgb
views.ged.pred.t2 -> views.pred.xgb

#### NA omit for Task 3 ####
na.omit(views.ged.learn.t3) -> views.learn.xgb
views.ged.pred.t3 -> views.pred.xgb

#####


## all models merge here for XGBoost - careful!
#### prepare data for XGBoost by setting label and train as matrix
#*'!!!'*# ADJUST S-X Variable!

train.data.xgb = as.matrix(views.learn.xgb[,7:48])
train.label.xgb = as.matrix(views.learn.xgb[, 62])
test.data.xgb = as.matrix(views.pred.xgb[,7:48])
test.label.xgb = as.matrix(views.pred.xgb[, 62])



### XGBoost train ####
xgb.001 <- xgboost(data= train.data.xgb, label=train.label.xgb,
                   objective = "reg:linear", 
                   eval_metric = "rmse",
                   max.depth =25, 
                   eta = 0.01, 
                   nround = 750, 
                   subsample = 0.5, 
                   colsample_bytree = 0.5, 
                   nthread = 3
)

### TASK 1 XGBoost Train old ####
xgb.t1 <- xgboost(data= train.data.xgb, label=train.label.xgb,
                  objective = "reg:linear", 
                  eval_metric = "rmse",
                  max.depth = 25, 
                  eta = 0.01, 
                  nround = 950, 
                  subsample = 0.5, 
                  colsample_bytree = 0.5, 
                  nthread = 3
)

### Task 2 XGBoost Train ####
xgb.t2 <- xgboost(data= train.data.xgb, label=train.label.xgb,
                       objective = "reg:linear", 
                       eval_metric = "rmse",
                       max.depth = 25, 
                       eta = 0.01, 
                       nround = 950, 
                       subsample = 0.5, 
                       colsample_bytree = 0.5, 
                       nthread = 3
)

### Task 3 XGBoost Train ####
xgb.t3 <- xgboost(data= train.data.xgb, label=train.label.xgb,
                  objective = "reg:linear", 
                  eval_metric = "rmse",
                  max.depth = 25, 
                  eta = 0.01, 
                  nround = 950, 
                  subsample = 0.5, 
                  colsample_bytree = 0.5, 
                  nthread = 3
)


#####

## Test #####
importance.xgb.001 <- xgb.importance(model=xgb.001)

xgb.pre.001 = predict(xgb.001,test.data.xgb,reshape=T)
xgb.pre.001.frame <- as.data.frame(xgb.pre.001)
xgb.pre.001.frame$real <- test.label.xgb



## Task 1 #####
importance.xgb.t1.s3 <- xgb.importance(model=xgb.t1)
write.csv(importance.xgb.t1.s3, file="importance_task1_s3_ged.csv", row.names=FALSE)

xgb.pre.t1 = predict(xgb.t1, test.data.xgb, reshape=T)
xgb.pre.t1.frame <- as.data.frame(xgb.pre.t1)


## Task 2 #####
importance.xgb.t2.s3 <- xgb.importance(model=xgb.t2)
write.csv(importance.xgb.t2.s3, file="importance_task2_s3_ged.csv", row.names=FALSE)

xgb.pre.t2 = predict(xgb.t2, test.data.xgb, reshape=T)
xgb.pre.t2.frame <- as.data.frame(xgb.pre.t2)


## Task 3 #####
importance.xgb.t3.s1 <- xgb.importance(model=xgb.t3)
write.csv(importance.xgb.t3.s1, file="importance_task3_s3_ged.csv", row.names=FALSE)

xgb.pre.t3 = predict(xgb.t3, test.data.xgb, reshape=T)
xgb.pre.t3.frame <- as.data.frame(xgb.pre.t3)


#+#+#+#+#

#### Collect Prediction Results in Dataframe #####

Pred.Ensemble.GED <- as.data.frame(prediction.ged.01[["predicted"]])
colnames(Pred.Ensemble.GED) <- c("RF.01")

Pred.Ensemble.GED$XGBoost.01 <- xgb.pre.001.frame$xgb.pre.001

# RF.pred.compare2$real.sb <- views.pred.small$ln_ged_best_sb

Pred.Ensemble.GED$month_id <- views.ged.pred$month_id+2
Pred.Ensemble.GED$country_id <- views.ged.pred$country_id
Pred.Ensemble.GED$country_name <- views.ged.pred$country_name
Pred.Ensemble.GED$real <- prediction.ged.01[["yvar"]]

write.csv(Pred.Ensemble.GED, file="Pred_Ensemble_GED.csv", row.names=FALSE) 


## MSE test
MSE(Pred.Ensemble.GED$RF.01, Pred.Ensemble.GED$real)
MSE(Pred.Ensemble.GED$XGBoost.01, Pred.Ensemble.GED$real)

## sync test
MSE(Pred.Ensemble.GED$XGBoost.01, Pred.Ensemble.GED$RF.01)

#####


#### Collect Prediction Results in Dataframe for Task 3: 2014-2016 predictions #####

## S-1
Pred.Ensemble.Task3.s1 <- read.csv("Pred_Ensemble_Task3_s1.csv", header=TRUE)

Pred.Ensemble.Task3.s1 -> Pred.Ensemble.Task3.s1.africa
Pred.Ensemble.Task3.s1.africa$in_africa <- views.ged.pred.t3[,6]

Pred.Ensemble.Task3.s1.africa <- filter(Pred.Ensemble.Task3.s1.africa, in_africa==1)

Pred.Ensemble.Task3.s1.africa$RF.GED.t3.s1.africa <- pr.t3.ged.s1[["predicted"]]
Pred.Ensemble.Task3.s1.africa$XGB.GED.t3.s1.africa <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s1.africa, file="Pred_Ensemble_Task3_s1_africa.csv", row.names=FALSE)


## S-2
Pred.Ensemble.Task3.s2 <- read.csv("Pred_Ensemble_Task3_s2.csv", header=TRUE)

Pred.Ensemble.Task3.s2 -> Pred.Ensemble.Task3.s2.africa
Pred.Ensemble.Task3.s2.africa$in_africa <- views.ged.pred.t3[,6]

Pred.Ensemble.Task3.s2.africa <- filter(Pred.Ensemble.Task3.s2.africa, in_africa==1)

Pred.Ensemble.Task3.s2.africa$RF.GED.t3.s2.africa <- pr.t3.ged.s2[["predicted"]]
Pred.Ensemble.Task3.s2.africa$XGB.GED.t3.s2.africa <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s2.africa, file="Pred_Ensemble_Task3_s2_africa.csv", row.names=FALSE)


## S-3
Pred.Ensemble.Task3.s3 <- read.csv("Pred_Ensemble_Task3_s3.csv", header=TRUE)

Pred.Ensemble.Task3.s3 -> Pred.Ensemble.Task3.s3.africa
Pred.Ensemble.Task3.s3.africa$in_africa <- views.ged.pred.t3[,6]

Pred.Ensemble.Task3.s3.africa <- filter(Pred.Ensemble.Task3.s3.africa, in_africa==1)

Pred.Ensemble.Task3.s3.africa$RF.GED.t3.s3.africa <- pr.t3.ged.s3[["predicted"]]
Pred.Ensemble.Task3.s3.africa$XGB.GED.t3.s3.africa <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s3.africa, file="Pred_Ensemble_Task3_s3_africa.csv", row.names=FALSE)


## S-4
Pred.Ensemble.Task3.s4 <- read.csv("Pred_Ensemble_Task3_s4.csv", header=TRUE)

Pred.Ensemble.Task3.s4 -> Pred.Ensemble.Task3.s4.africa
Pred.Ensemble.Task3.s4.africa$in_africa <- views.ged.pred.t3[,6]

Pred.Ensemble.Task3.s4.africa <- filter(Pred.Ensemble.Task3.s4.africa, in_africa==1)

Pred.Ensemble.Task3.s4.africa$RF.GED.t3.s4.africa <- pr.t3.ged.s4[["predicted"]]
Pred.Ensemble.Task3.s4.africa$XGB.GED.t3.s4.africa <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s4.africa, file="Pred_Ensemble_Task3_s4_africa.csv", row.names=FALSE)


## S-5
Pred.Ensemble.Task3.s5 <- read.csv("Pred_Ensemble_Task3_s5.csv", header=TRUE)

Pred.Ensemble.Task3.s5 -> Pred.Ensemble.Task3.s5.africa
Pred.Ensemble.Task3.s5.africa$in_africa <- views.ged.pred.t3[,6]

Pred.Ensemble.Task3.s5.africa <- filter(Pred.Ensemble.Task3.s5.africa, in_africa==1)

Pred.Ensemble.Task3.s5.africa$RF.GED.t3.s5.africa <- pr.t3.ged.s5[["predicted"]]
Pred.Ensemble.Task3.s5.africa$XGB.GED.t3.s5.africa <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s5.africa, file="Pred_Ensemble_Task3_s5_africa.csv", row.names=FALSE)


## S-6
Pred.Ensemble.Task3.s6 <- read.csv("Pred_Ensemble_Task3_s6.csv", header=TRUE)

Pred.Ensemble.Task3.s6 -> Pred.Ensemble.Task3.s6.africa
Pred.Ensemble.Task3.s6.africa$in_africa <- views.ged.pred.t3[,6]

Pred.Ensemble.Task3.s6.africa <- filter(Pred.Ensemble.Task3.s6.africa, in_africa==1)

Pred.Ensemble.Task3.s6.africa$RF.GED.t3.s6.africa <- pr.t3.ged.s6[["predicted"]]
Pred.Ensemble.Task3.s6.africa$XGB.GED.t3.s6.africa <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s6.africa, file="Pred_Ensemble_Task3_s6_africa.csv", row.names=FALSE)


## S-7
Pred.Ensemble.Task3.s7 <- read.csv("Pred_Ensemble_Task3_s7.csv", header=TRUE)

Pred.Ensemble.Task3.s7 -> Pred.Ensemble.Task3.s7.africa
Pred.Ensemble.Task3.s7.africa$in_africa <- views.ged.pred.t3[,6] 

Pred.Ensemble.Task3.s7.africa <- filter(Pred.Ensemble.Task3.s7.africa, in_africa==1)

Pred.Ensemble.Task3.s7.africa$RF.GED.t3.s7.africa <- pr.t3.ged.s7[["predicted"]]
Pred.Ensemble.Task3.s7.africa$XGB.GED.t3.s7.africa <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s7.africa, file="Pred_Ensemble_Task3_s7_africa.csv", row.names=FALSE)




#### Collect Prediction Results in Dataframe for Task 2: 2017-2019 predictions #####

## S-1
Pred.Ensemble.Task2.s1 <- read.csv("Pred_Ensemble_Task2_s1.csv", header=TRUE)

Pred.Ensemble.Task2.s1 -> Pred.Ensemble.Task2.s1.africa
Pred.Ensemble.Task2.s1.africa[,5:8] <- views.ged.pred.t2[,c(2:3,5:6)]
Pred.Ensemble.Task2.s1.africa$month_id + 1 -> Pred.Ensemble.Task2.s1.africa$month_id 
Pred.Ensemble.Task2.s1.africa <- filter(Pred.Ensemble.Task2.s1.africa, in_africa==1)

Pred.Ensemble.Task2.s1.africa$RF.GED.t2.s1.africa <- pr.t2.ged.s1[["predicted"]]
Pred.Ensemble.Task2.s1.africa$XGB.GED.t2.s1.africa <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s1.africa, file="Pred_Ensemble_Task2_s1_africa.csv", row.names=FALSE)


## s-2
Pred.Ensemble.Task2.s2 <- read.csv("Pred_Ensemble_Task2_s2.csv", header=TRUE)

Pred.Ensemble.Task2.s2 -> Pred.Ensemble.Task2.s2.africa
Pred.Ensemble.Task2.s2.africa[,5:8] <- views.ged.pred.t2[,c(2:3,5:6)] ## use pred without africa dummy!
Pred.Ensemble.Task2.s2.africa$month_id + 2 -> Pred.Ensemble.Task2.s2.africa$month_id 
Pred.Ensemble.Task2.s2.africa <- filter(Pred.Ensemble.Task2.s2.africa, in_africa==1)

Pred.Ensemble.Task2.s2.africa$RF.GED.t2.s2.africa <- pr.t2.ged.s2[["predicted"]]
Pred.Ensemble.Task2.s2.africa$XGB.GED.t2.s2.africa <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s2.africa, file="Pred_Ensemble_Task2_s2_africa.csv", row.names=FALSE)


## s-3
Pred.Ensemble.Task2.s3 <- read.csv("Pred_Ensemble_Task2_s3.csv", header=TRUE)

Pred.Ensemble.Task2.s3 -> Pred.Ensemble.Task2.s3.africa
Pred.Ensemble.Task2.s3.africa[,5:8] <- views.ged.pred.t2[,c(2:3,5:6)] ## use pred without africa dummy!
Pred.Ensemble.Task2.s3.africa$month_id + 3 -> Pred.Ensemble.Task2.s3.africa$month_id 
Pred.Ensemble.Task2.s3.africa <- filter(Pred.Ensemble.Task2.s3.africa, in_africa==1)

Pred.Ensemble.Task2.s3.africa$RF.GED.t2.s3.africa <- pr.t2.ged.s3[["predicted"]]
Pred.Ensemble.Task2.s3.africa$XGB.GED.t2.s3.africa <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s3.africa, file="Pred_Ensemble_Task2_s3_africa.csv", row.names=FALSE)


## s-4
Pred.Ensemble.Task2.s4 <- read.csv("Pred_Ensemble_Task2_s4.csv", header=TRUE)

Pred.Ensemble.Task2.s4 -> Pred.Ensemble.Task2.s4.africa
Pred.Ensemble.Task2.s4.africa[,5:8] <- views.ged.pred.t2[,c(2:3,5:6)] ## use pred without africa dummy!
Pred.Ensemble.Task2.s4.africa$month_id + 4 -> Pred.Ensemble.Task2.s4.africa$month_id 
Pred.Ensemble.Task2.s4.africa <- filter(Pred.Ensemble.Task2.s4.africa, in_africa==1)

Pred.Ensemble.Task2.s4.africa$RF.GED.t2.s4.africa <- pr.t2.ged.s4[["predicted"]]
Pred.Ensemble.Task2.s4.africa$XGB.GED.t2.s4.africa <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s4.africa, file="Pred_Ensemble_Task2_s4_africa.csv", row.names=FALSE)


## s-5
Pred.Ensemble.Task2.s5 <- read.csv("Pred_Ensemble_Task2_s5.csv", header=TRUE)

Pred.Ensemble.Task2.s5 -> Pred.Ensemble.Task2.s5.africa
Pred.Ensemble.Task2.s5.africa[,5:9] <- views.ged.pred.t2[,c(2:3,5:6,60)] ## use pred without Africa dummy!
Pred.Ensemble.Task2.s5.africa$month_id + 5 -> Pred.Ensemble.Task2.s5.africa$month_id 
Pred.Ensemble.Task2.s5.africa <- filter(Pred.Ensemble.Task2.s5.africa, in_africa==1)

Pred.Ensemble.Task2.s5.africa$RF.GED.t2.s5.africa <- pr.t2.ged.s5[["predicted"]]
Pred.Ensemble.Task2.s5.africa$XGB.GED.t2.s5.africa <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s5.africa, file="Pred_Ensemble_Task2_s5_africa.csv", row.names=FALSE)

## s-6
Pred.Ensemble.Task2.s6 <- read.csv("Pred_Ensemble_Task2_s6.csv", header=TRUE)

Pred.Ensemble.Task2.s6 -> Pred.Ensemble.Task2.s6.africa
Pred.Ensemble.Task2.s6.africa[,5:9] <- views.ged.pred.t2[,c(2:3,5:6,61)] ## use pred without Africa dummy!
Pred.Ensemble.Task2.s6.africa$month_id + 6 -> Pred.Ensemble.Task2.s6.africa$month_id
Pred.Ensemble.Task2.s6.africa <- filter(Pred.Ensemble.Task2.s6.africa, in_africa==1)
Pred.Ensemble.Task2.s6.africa$RF.GED.t2.s6.africa <- pr.t2.ged.s6[["predicted"]]
Pred.Ensemble.Task2.s6.africa$XGB.GED.t2.s6.africa <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s6.africa, file="Pred_Ensemble_Task2_s6_africa.csv", row.names=FALSE)

## s-7
Pred.Ensemble.Task2.s7 <- read.csv("Pred_Ensemble_Task2_s7.csv", header=TRUE)

Pred.Ensemble.Task2.s7 -> Pred.Ensemble.Task2.s7.africa
Pred.Ensemble.Task2.s7.africa[,8:9] <- views.ged.pred.t2[,c(6,62)] ## use pred without Africa dummy!

Pred.Ensemble.Task2.s7.africa <- filter(Pred.Ensemble.Task2.s7.africa, in_africa==1)
Pred.Ensemble.Task2.s7.africa$RF.GED.t2.s7.africa <- pr.t2.ged.s7[["predicted"]]
Pred.Ensemble.Task2.s7.africa$XGB.GED.t2.s7.africa <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s7.africa, file="Pred_Ensemble_Task2_s7_africa.csv", row.names=FALSE)



###
## add real values to s1 - s4
Pred.Ensemble.Task2.s1.africa[,1:10] -> Pred.Ensemble.Task2.s1.africa
Pred.Ensemble.Task2.s2.africa[,1:10] -> Pred.Ensemble.Task2.s2.africa

### load correct training data
views.ged.pred.t2[,56]  -> Pred.Ensemble.Task2.s1.africa$real
views.ged.pred.t2[,57]  -> Pred.Ensemble.Task2.s2.africa$real 
views.ged.pred.t2[,58]  -> Pred.Ensemble.Task2.s3.africa$real 
views.ged.pred.t2[,59]  -> Pred.Ensemble.Task2.s4.africa$real

##check MSE between XGB & RF - Adjust all (4x) sX
## MSEs

MSE(Pred.Ensemble.Task2.s1.africa$RF.GED.t2.s1, Pred.Ensemble.Task2.s1.africa$real)
MSE(Pred.Ensemble.Task2.s1.africa$XGB.GED.t2.s1, Pred.Ensemble.Task2.s1.africa$real)
MSE(Pred.Ensemble.Task2.s1.africa$RF.United.t2.s1, Pred.Ensemble.Task2.s1.africa$real)
MSE(Pred.Ensemble.Task2.s1.africa$XGB.United.t2.s1, Pred.Ensemble.Task2.s1.africa$real)
MSE(Pred.Ensemble.Task2.s1.africa$RF.GED.t2.s1.africa, Pred.Ensemble.Task2.s1.africa$real)
MSE(Pred.Ensemble.Task2.s1.africa$XGB.GED.t2.s1.africa, Pred.Ensemble.Task2.s1.africa$real)


### Merge predictions
Pred.Ensemble.Task2.s1.africa$Ensemble.forecast.t2.s1 <- rowMeans(Pred.Ensemble.Task2.s1.africa[,c(1:4, 9:10)])


MSE(Pred.Ensemble.Task2.s1.africa$Ensemble.forecast.t2.s1, Pred.Ensemble.Task2.s1.africa$real)





#+#


#### Collect Prediction Results in Dataframe for Task 1: 2020-21 predictions #####

## add Africa only s-2
Pred.Ensemble.Task1.s2 <- read.csv("Pred_Ensemble_Task1_s2.csv", header=TRUE)


Pred.Ensemble.Task1.s2 -> Pred.Ensemble.Task1.s2.africa
Pred.Ensemble.Task1.s2.africa[,9:11] <- ged.pred.true[,c(2:3,6)] 
Pred.Ensemble.Task1.s2.africa <- filter(Pred.Ensemble.Task1.s2.africa, in_africa==1)

Pred.Ensemble.Task1.s2.africa$RF.GED.t1.s2.africa <- pr.t1.ged.s2[["predicted"]]

write.csv(Pred.Ensemble.Task1.s2.africa, file="Pred_Ensemble_Task1_s2_africa.csv", row.names=FALSE)

## s-3
Pred.Ensemble.Task1.s3 <- as.data.frame(pr.t1.ged.s3[["predicted"]])
colnames(Pred.Ensemble.Task1.s3) <- c("RF.GED.t1.s3")

Pred.Ensemble.Task1.s3$XGB.GED.t1.s3 <- xgb.pre.t1.frame$xgb.pre.t1

Pred.Ensemble.Task1.s3$month_id <- ged.pred.true$month_id+3
Pred.Ensemble.Task1.s3$country_id <- ged.pred.true$country_id
Pred.Ensemble.Task1.s3$country_name <- ged.pred.true$country_name

write.csv(Pred.Ensemble.Task1.s3, file="Pred_Ensemble_Task1_s3.csv", row.names=FALSE)

### add africa.only s-3
Pred.Ensemble.Task1.s3 <- read.csv("Pred_Ensemble_Task1_s3.csv", header=TRUE)


Pred.Ensemble.Task1.s3 -> Pred.Ensemble.Task1.s3.africa
Pred.Ensemble.Task1.s3.africa$in_africa <- ged.pred.true[,6] 
Pred.Ensemble.Task1.s3.africa <- filter(Pred.Ensemble.Task1.s3.africa, in_africa==1)

Pred.Ensemble.Task1.s3.africa$RF.GED.t1.s3.africa <- pr.t1.ged.s3[["predicted"]]

write.csv(Pred.Ensemble.Task1.s3.africa, file="Pred_Ensemble_Task1_s3_africa.csv", row.names=FALSE)

MSE(Pred.Ensemble.Task1.s3.africa$RF.GED.t1.s3.africa, Pred.Ensemble.Task1.s3.africa$RF.GED.t1.s3)





## s-4
Pred.Ensemble.Task1.s4 <- as.data.frame(pr.t1.ged.s4[["predicted"]])
colnames(Pred.Ensemble.Task1.s4) <- c("RF.GED.t1.s4")

Pred.Ensemble.Task1.s4$XGB.GED.t1.s4 <- xgb.pre.t1.frame$xgb.pre.t1

Pred.Ensemble.Task1.s4$month_id <- ged.pred.true$month_id+4
Pred.Ensemble.Task1.s4$country_id <- ged.pred.true$country_id
Pred.Ensemble.Task1.s4$country_name <- ged.pred.true$country_name

write.csv(Pred.Ensemble.Task1.s4, file="Pred_Ensemble_Task1_s4.csv", row.names=FALSE)

### add africa.only s-4
Pred.Ensemble.Task1.s4 <- read.csv("Pred_Ensemble_Task1_s4.csv", header=TRUE)


Pred.Ensemble.Task1.s4 -> Pred.Ensemble.Task1.s4.africa
Pred.Ensemble.Task1.s4.africa$in_africa <- ged.pred.true[,6] 
Pred.Ensemble.Task1.s4.africa <- filter(Pred.Ensemble.Task1.s4.africa, in_africa==1)

Pred.Ensemble.Task1.s4.africa$RF.GED.t1.s4.africa <- pr.t1.ged.s4[["predicted"]]

write.csv(Pred.Ensemble.Task1.s4.africa, file="Pred_Ensemble_Task1_s4_africa.csv", row.names=FALSE)

MSE(Pred.Ensemble.Task1.s4.africa$RF.GED.t1.s4.africa, Pred.Ensemble.Task1.s4.africa$RF.GED.t1.s4)

### add africa.only s-5
Pred.Ensemble.Task1.s5 <- read.csv("Pred_Ensemble_Task1_s5.csv", header=TRUE)


Pred.Ensemble.Task1.s5 -> Pred.Ensemble.Task1.s5.africa
Pred.Ensemble.Task1.s5.africa$in_africa <- ged.pred.true[,6] 
Pred.Ensemble.Task1.s5.africa <- filter(Pred.Ensemble.Task1.s5.africa, in_africa==1)

Pred.Ensemble.Task1.s5.africa$RF.GED.t1.s5.africa <- pr.t1.ged.s5[["predicted"]]

write.csv(Pred.Ensemble.Task1.s5.africa, file="Pred_Ensemble_Task1_s5_africa.csv", row.names=FALSE)

MSE(Pred.Ensemble.Task1.s5.africa$RF.GED.t1.s5.africa, Pred.Ensemble.Task1.s5.africa$RF.GED.t1.s5)

### add africa.only s-6
Pred.Ensemble.Task1.s6 <- read.csv("Pred_Ensemble_Task1_s6.csv", header=TRUE)


Pred.Ensemble.Task1.s6 -> Pred.Ensemble.Task1.s6.africa
Pred.Ensemble.Task1.s6.africa$in_africa <- ged.pred.true[,6] 
Pred.Ensemble.Task1.s6.africa <- filter(Pred.Ensemble.Task1.s6.africa, in_africa==1)

Pred.Ensemble.Task1.s6.africa$RF.GED.t1.s6.africa <- pr.t1.ged.s6[["predicted"]]

write.csv(Pred.Ensemble.Task1.s6.africa, file="Pred_Ensemble_Task1_s6_africa.csv", row.names=FALSE)

MSE(Pred.Ensemble.Task1.s6.africa$RF.GED.t1.s6.africa, Pred.Ensemble.Task1.s6.africa$RF.GED.t1.s6)


### add africa.only s-7
Pred.Ensemble.Task1.s7 <- read.csv("Pred_Ensemble_Task1_s7.csv", header=TRUE)


Pred.Ensemble.Task1.s7 -> Pred.Ensemble.Task1.s7.africa
Pred.Ensemble.Task1.s7.africa$in_africa <- ged.pred.true[,6] 
Pred.Ensemble.Task1.s7.africa <- filter(Pred.Ensemble.Task1.s7.africa, in_africa==1)

Pred.Ensemble.Task1.s7.africa$RF.GED.t1.s7.africa <- pr.t1.ged.s7[["predicted"]]

write.csv(Pred.Ensemble.Task1.s7.africa, file="Pred_Ensemble_Task1_s7_africa.csv", row.names=FALSE)

MSE(Pred.Ensemble.Task1.s7.africa$RF.GED.t1.s7.africa, Pred.Ensemble.Task1.s7.africa$RF.GED.t1.s7)




## old setup s-2
Pred.Ensemble.October2020 <- as.data.frame(pred.s2.true[["predicted"]])
colnames(Pred.Ensemble.October2020) <- c("RF.GED.s2.true")

Pred.Ensemble.October2020$XGBoost.GED.s2.true <- xgb.pre.s2.true.frame$xgb.pre.s2.true

write.csv(Pred.Ensemble.October2020, file="Pred_Ensemble_Oct2020.csv", row.names=FALSE)


####
Pred.Ensemble.October2020$month_id <- ged.pred.s2.monthid.490$month_id+2
Pred.Ensemble.October2020$country_id <- ged.pred.s2.monthid.490$country_id
Pred.Ensemble.October2020$country_name <- ged.pred.s2.monthid.490$country_name

#reorder by column index
data <- data[c(1,3,2)]

####
obj  <- filter(Pred.Ensemble.November2020, month_id=490)
# left_join obj

MSE(Pred.Ensemble.October2020$RF.GED.s2.monthid.490, Pred.Ensemble.October2020$XGBoost.GED.s2.monthid.490)
MSE(Pred.Ensemble.Task1.s3$XGB.GED.t1.s3, Pred.Ensemble.Task1.s3$RF.GED.t1.s3)


